knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
# attach packages
library(tidyverse)
library(here)
library(sf)
library(tmap)
sf_trees <- read_csv(here("data","sf_trees","sf_trees.csv"))
Example 1: Find counts of observation by “legal_status” & wrangle a bit
# method 1: group_by() %>% summarize()
sf_trees %>%
group_by(legal_status) %>%
summarize(tree_count = n())
## # A tibble: 10 × 2
## legal_status tree_count
## <chr> <int>
## 1 DPW Maintained 141725
## 2 Landmark tree 42
## 3 Permitted Site 39732
## 4 Planning Code 138.1 required 971
## 5 Private 163
## 6 Property Tree 316
## 7 Section 143 230
## 8 Significant Tree 1648
## 9 Undocumented 8106
## 10 <NA> 54
# method 2: different way plus a few new functions
top_5_status <- sf_trees %>%
count(legal_status) %>% # groups by and counts with one function (shortcut for group_by, summarize)
drop_na(legal_status) %>% # drop NA values in the legal_status column
rename(tree_count = n) %>% # change column name
relocate(tree_count) %>% # bring named column to the front spot of the data set
slice_max(tree_count, n = 5) %>% # only show the top 5 legal statuses by tree count
arrange(-tree_count) # sets highest to lowest
Make a graph of the top 5 legal statuses by tree count
ggplot(data = top_5_status, aes(x = fct_reorder(legal_status, tree_count), y = tree_count)) + # fct_reorder the legal_status by tree_count instead of in alphabetical order
geom_col(fill = "darkgreen") +
labs(x = "legal status",
y = "tree count") +
coord_flip() +
theme_minimal()
Example 2: Only keep observations where legal status is “Permitted Site” and caretaker is “MTA”, and store as permitted_data_df
permitted_data_df <- sf_trees %>%
filter(legal_status == "Permitted Site",
caretaker == "MTA")
Example 3: Only keep Blackwood Acacia trees, and then only keep columns “legal_status”, “date”, “latitude”, “longitude”, and store as blackwood_acacia_df
blackwood_acacia_df <- sf_trees %>%
filter(str_detect(species, "Blackwood Acacia")) %>% # detects words within a longer string of words so you don't have to type out the whole thing
select(legal_status, date, lat = latitude, long = longitude) # select desired columns and rename some
# make a little graph of locations
ggplot(data = blackwood_acacia_df, aes(x = long, y = lat)) +
geom_point(color = "darkgreen")
Example 4: Use “tidyr::separate()”
sf_trees_sep <- sf_trees %>%
separate(species, # separate this column
into = c("spp_scientific", "spp_common"), # into two columns
sep = " :: ") # break here
Example 5: Use “tidyr::unite()”
ex_5 <- sf_trees %>%
unite("id_status",
tree_id, legal_status,
sep = "_COOL_")
Step 1: convert lat/long to spatial points, “st_as_sf()”
blackwood_acacia_sf <- blackwood_acacia_df %>%
drop_na(lat, long) %>%
st_as_sf(coords = c("long", "lat"))
# we need to tell R what the coordinate reference system is
st_crs(blackwood_acacia_sf) <- 4326
ggplot(data = blackwood_acacia_sf) +
geom_sf(color = "darkgreen")
Step 2: read in the SF shapefile and add map
sf_map <- read_sf(here("data", "sf_map", "tl_2017_06075_roads.shp")) # read in shapefile
sf_map_transform <- st_transform(sf_map, 4326) # transform coordinate ref system to match graph above
ggplot(data = sf_map_transform) +
geom_sf()
Step 3: combine the maps!
ggplot() +
geom_sf(data = sf_map,
size = 0.1,
color = "darkgrey") +
geom_sf(data = blackwood_acacia_sf,
size = 0.5,
color = "darkgreen") +
theme_void() +
labs(title = "Blackwood acacias in SF")
tmap_mode("view") # set mode to interactive viewing
tm_shape(blackwood_acacia_sf) + # like ggplot
tm_dots() # like geom_point